### ---- SCRIPT 06: The purpose of this script is to summarise the guest appearances on each programme. This includes --- ####
### ---- the proportion of guests on each programme by their category type, organisation, and organisation leaning. --- ####

options(scipen=999)

#### ---- LIBRARIES ---- ####

library(dplyr) # data wrangling
library(ggplot2) # data visualisation
library(data.table) # big data wrangling

#### ---- DIRECTORIES ---- ####

raw_dir <- "00-raw_data/"
processed_dir <- "01-processed_data/"
figure_dir <- "02-figures/"

#### ---- LOAD DATA ---- ####

# Twitter user data 
user_data <- fread(paste0(processed_dir,"user_ideal_point_data_updated.csv"))

# Guest masterlist
guest_data <- fread(paste0(processed_dir,"username_master_list_with_ideal_points.csv"))

# TV guest lists
guest_lists <- list.files(paste0(raw_dir,"guest_lists/"),full.names = TRUE) %>%
  lapply(fread)

#### ---- MERGE THE GUEST LISTS TOGETHER AND MATCH TO THEIR IDEAL POINTS ---- ####

# Concatenate the individual guest lists together into one dataset
guest_lists_merged <- rbindlist(guest_lists)

# Match each guest appearance in the dataset to their corresponding ideal point and the ideal point of their
# organisation and organisation affiliates. These will only be used where an individual does not have an ideal point
# of their own.
guest_lists_merged <- left_join(guest_lists_merged,guest_data,by=c("Name" = "name"))

#### ---- SUMMARISE GUEST SELECTIONS ON EACH SHOW BY CATEGORY TYPE AND ORGNISATION ---- ####

# Summarise the overall number of guest appearances by type
guest_catergories_summary <- guest_lists_merged %>%
  group_by(category_type) %>%
  summarise(count = n()) %>%
  mutate(percentage = count / sum(count) * 100)

# Summarise T.V show guest appearances by proportion of guests from each main category type
tv_show_guest_categories <- guest_lists_merged %>%
  group_by(Show,category_type) %>%
  summarise(count = n()) %>%
  mutate(percentage = count / sum(count) * 100)

# By Organisation
guest_lists_merged$organisation[guest_lists_merged$organisation == "N/A"] <- "Unaffiliated"

# Shorten National Union of Rail, Maritime and Transport Workers to RMT
guest_lists_merged$organisation <- ifelse(guest_lists_merged$organisation == "National Union of Rail, Maritime and Transport Workers",
                                          "RMT", as.character(guest_lists_merged$organisation))

# Take top 30 organisations that appear, fold all others into one category ("Other")
organisations_top30 <- guest_lists_merged %>%
  group_by(organisation) %>%
  summarise(count = n()) %>%
  arrange(desc(count)) %>%
  head(30)

guest_lists_merged$organisation[!guest_lists_merged$organisation %in% organisations_top30$organisation] <- "Other"

tv_show_organisation <- guest_lists_merged %>%
  group_by(Show,organisation) %>%
  summarise(count = n()) %>%
  arrange(Show) %>%
  group_by(Show) %>%
  mutate(percentage = count / sum(count) * 100,
         cum_perc = cumsum(percentage))

tv_show_organisation_coords <- tv_show_organisation %>% 
  select(Show,organisation,percentage)

organisations_ordered <- append(organisations_top30$organisation,"Other")

tv_show_organisation_coords$organisation <- factor(tv_show_organisation_coords$organisation, 
                                                   levels = rev(organisations_ordered))

tv_show_organisation_coords$Show <- factor(tv_show_organisation_coords$Show, 
                                           levels = c("BBC One: QT","BBC One: SwLK","BBC Two: PL",
                                                      "ITV: Peston","Channel 4: ANS","Sky: SRoS",
                                                      "GB News: CTS"))

tv_orgs_heatmap <- ggplot(tv_show_organisation_coords, aes(Show, organisation, fill= percentage)) + 
  geom_tile() +
  labs(x = "",
       y = "",
       fill = "Percent") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 30, vjust = 1, hjust = 1),
        axis.title = element_blank(),
        panel.grid = element_blank())  + 
  scale_fill_gradientn(colours = c("white","grey","black"), values = c(0,0.1,1))

ggsave(paste0(figure_dir,"guests_organisation_heatmap.png"),
       tv_orgs_heatmap,
       units="in", width=7, height=7, dpi=300,
       bg="white")

#### ---- END ---- ####
